library(Seurat)
library(tidyverse)
library(magrittr)
library(ggplot2)
library(patchwork)
library(sctransform)
library(cowplot)
library(harmony)#Prior to load the data, please make sure the gene feature names shoud use uppercases (especiall for mouse).
samples <- list.files("/Users/zhuliu/Desktop/scRNA_STseq/proj_10X_woundhealing/03_results/00-cellrange_counts/")
sm_folders <- paste0("/Users/zhuliu/Desktop/scRNA_STseq/proj_10X_woundhealing/03_results/00-cellrange_counts/", samples, "/outs/filtered_feature_bc_matrix")
sceList <- list()
for (i in seq_along(samples)) {
tmp_sms <- samples[i]
sceList[[tmp_sms]] <- CreateSeuratObject(
counts = Read10X(sm_folders[i]),
min.cells = 3, # filter this later
min.features = 200, # filter this later
project = tmp_sms)
}
# filtering by nCount and nFeatures per individual
filterCell <- function(combined){
# calculate the quantile range
count.feature.ls <- combined@meta.data[, c("nCount_RNA", "nFeature_RNA")]
count.feature.ls %<>% map(log10) %>% map(~c(10^(mean(.x) + 3*sd(.x)), 10^(mean(.x) - 3*sd(.x))))
# filter cells
combined <- subset(combined, subset = nFeature_RNA > 200 &
nFeature_RNA < count.feature.ls[[2]][1] &
nCount_RNA < count.feature.ls[[1]][1])
return(combined)
}
sceList %<>% map(filterCell)
hswound.com <- merge(sceList[[1]],
y = c(sceList[[2]], sceList[[3]], sceList[[4]], sceList[[5]], sceList[[6]],
sceList[[7]], sceList[[8]], sceList[[9]], sceList[[10]], sceList[[11]], sceList[[12]]),
add.cell.ids = samples,
project = "humanWounds")
hswound.com #
head(colnames(hswound.com)); tail(colnames(hswound.com))
#cell numbers of each sample
table(Idents(hswound.com))
rm(sceList) #remove individual seurat objectsmetadata <- readxl::read_xlsx("/Users/zhuliu/Desktop/scRNA_STseq/proj_10X_woundhealing/02_metadata/Metadata_STsc.xlsx") %>% slice(17:28)
metadata <- metadata %>% select(3:6,11) %>% mutate(User_ID = gsub("_","",User_ID)) %>% rename(ID=User_ID)
metadata <- hswound.com@meta.data %>% left_join(., metadata, by=c("orig.ident" = "ID"))
rownames(metadata) <- Cells(hswound.com)
hswound.com <- AddMetaData(hswound.com, metadata = metadata)
saveRDS(hswound.com, file = "../../00_Origin_objects/s0_original_seurat_object.rds")Original cell numbers for each sample
## An object of class Seurat
## 27973 features across 65462 samples within 1 assay
## Active assay: RNA (27973 features, 0 variable features)
##
## PWH26D0 PWH26D1 PWH26D30 PWH26D7 PWH27D0 PWH27D1 PWH27D30 PWH27D7
## 4702 5035 3692 4815 3597 7280 6605 7338
## PWH28D0 PWH28D1 PWH28D30 PWH28D7
## 5038 4973 6414 5973
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2752904 147.1 4899647 261.7 NA 4899647 261.7
## Vcells 4716516 36.0 712318396 5434.6 32768 741703711 5658.8
hswound.com <- readRDS("../00_Origin_objects/s0_original_seurat_object.rds")
hswound.com
table(hswound.com$orig.ident)
#filter out the doublets
doublets <- list.files("/Users/zhuliu/Desktop/scRNA_STseq/proj_10X_woundhealing/03_results/01-Seurat-PreAnalysis/00_Seurat_qc_Doublets", pattern = ".txt$")
doublets.path <- paste0("../00_Seurat_qc_Doublets/", doublets)
doublets_files <- lapply(doublets.path, FUN = function(x){
data.table::fread(x)
})
names(doublets_files) <- gsub("(Shared_doublets_)|(\\.txt)", "", doublets)
doublets_com <- do.call("rbind", doublets_files) %>%
mutate(Doublet = "Doublet")
rm(doublets, doublets.path, doublets_files)
table(doublets_com$sampleID)
metadata <- hswound.com@meta.data %>% rownames_to_column(var = "mapID") %>%
left_join(., doublets_com[,c(3:4)], by=c("mapID" = "mapID")) %>%
column_to_rownames(var = "mapID") %>%
mutate(Doublet = ifelse(is.na(Doublet), "Singlet", Doublet))
hswound.com <- AddMetaData(hswound.com, metadata = metadata)
#Extract the singlet cells
hswound.com <- subset(hswound.com, subset = Doublet == "Singlet")
table(hswound.com$Doublet)
rm(metadata, doublets_com)
saveRDS(hswound.com, file = "../00_Origin_objects/s0_original_seurat_object_noDoublets.rds") Cell numbers for each sample after removing doublets
## An object of class Seurat
## 27973 features across 64056 samples within 1 assay
## Active assay: RNA (27973 features, 0 variable features)
##
## PWH26D0 PWH26D1 PWH26D30 PWH26D7 PWH27D0 PWH27D1 PWH27D30 PWH27D7
## 4570 4928 3648 4784 3534 7084 6347 7034
## PWH28D0 PWH28D1 PWH28D30 PWH28D7
## 4911 4935 6368 5913
#ratio of mitochondrial genes
hswound.com[["percent.mt"]] <- PercentageFeatureSet(object = hswound.com, pattern = "^MT-")
rownames(hswound.com)[grep("^MT-", rownames(hswound.com))]## [1] "MT-ND1" "MT-ND2" "MT-CO1" "MT-CO2" "MT-ATP8" "MT-ATP6" "MT-CO3"
## [8] "MT-ND3" "MT-ND4L" "MT-ND4" "MT-ND5" "MT-ND6" "MT-CYB"
#ratio of ribosomal genes
hswound.com[["percent.ribo"]] <- PercentageFeatureSet(object = hswound.com, pattern = "^RP[SL]")
rownames(hswound.com)[grep("^RP[SL]", rownames(hswound.com))]## [1] "RPL22" "RPL11" "RPS6KA1" "RPS8" "RPL5"
## [6] "RPS27" "RPS27AP5" "RPS6KC1" "RPS7" "RPS27A"
## [11] "RPL31" "RPL37A-DT" "RPL37A" "RPL32" "RPL15"
## [16] "RPSA" "RPL14" "RPL29" "RPL24" "RPL22L1"
## [21] "RPL39L" "RPL35A" "RPL9" "RPL34-DT" "RPL34"
## [26] "RPS3A" "RPL37" "RPS23" "RPS14" "RPL26L1"
## [31] "RPS18" "RPS10-NUDT3" "RPS10" "RPL10A" "RPL7L1"
## [36] "RPS12" "RPS6KA2" "RPS20" "RPL7" "RPL30"
## [41] "RPL8" "RPS6" "RPL35" "RPL12" "RPL7A"
## [46] "RPS24" "RPLP2" "RPL27A" "RPS13" "RPS6KA4"
## [51] "RPS6KB2" "RPS6KB2-AS1" "RPS3" "RPS25" "RPS26"
## [56] "RPL41" "RPL6" "RPLP0" "RPL21" "RPS29"
## [61] "RPL36AL" "RPS6KL1" "RPS6KA5" "RPS27L" "RPL4"
## [66] "RPLP1" "RPS17" "RPL3L" "RPS2" "RPS15A"
## [71] "RPL13" "RPL26" "RPL23A" "RPL23" "RPL19"
## [76] "RPL27" "RPS6KB1" "RPL38" "RPL17" "RPS15"
## [81] "RPL36" "RPS28" "RPL18A" "RPS16" "RPS19"
## [86] "RPL18" "RPL13A" "RPS11" "RPS9" "RPL28"
## [91] "RPS5" "RPS21" "RPL3" "RPS19BP1" "RPS6KA3"
## [96] "RPS4X" "RPS6KA6" "RPL36A" "RPL39" "RPL10"
## [101] "RPS4Y1" "RPS6KA2-IT1"
#ratio of hemoglobin genes
hswound.com[["percent.hb"]] <- PercentageFeatureSet(object = hswound.com, pattern = "^HB[^(PES)]")
rownames(hswound.com)[grep("^HB[^(PES)]", rownames(hswound.com))]## [1] "HBG2" "HBZ" "HBA2" "HBA1" "HBD" "HBQ1" "HBB" "HBM"
rownames(hswound.com)[grep("^HB", rownames(hswound.com))]## [1] "HBEGF" "HBS1L" "HBP1" "HBG2" "HBZ" "HBA2" "HBA1" "HBD" "HBQ1"
## [10] "HBB" "HBM"
#ratio of MALAT1 genes
hswound.com[["percent.malat1"]] <- PercentageFeatureSet(object = hswound.com, pattern = "MALAT1")
sd(hswound.com@meta.data$nFeature_RNA)## [1] 1765.312
d <- density(hswound.com@meta.data$nFeature_RNA)
{plot(d)
polygon(d, col="red", border="blue")}table(hswound.com@meta.data$nFeature_RNA > 500)##
## FALSE TRUE
## 1475 62581
summary(hswound.com@meta.data$nCount_RNA)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 499 10091 19094 23702 31238 294135
d <- density(hswound.com@meta.data$nCount_RNA)
{plot(d)
polygon(d, col="red", border="blue")}table(hswound.com@meta.data$nCount_RNA > 1000)##
## FALSE TRUE
## 1162 62894
table(hswound.com@meta.data$nCount_RNA > 1000 & hswound.com@meta.data$nCount_RNA < 150000)##
## FALSE TRUE
## 1224 62832
summary(hswound.com@meta.data$percent.mt)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.411 4.936 6.350 6.958 96.179
d <- density(hswound.com@meta.data$percent.mt)
{plot(d)
polygon(d, col="red", border="blue")}table(hswound.com@meta.data$percent.mt < 20)##
## FALSE TRUE
## 2144 61912
table(hswound.com@meta.data$percent.mt < 15)##
## FALSE TRUE
## 3213 60843
table(hswound.com@meta.data$percent.mt < 10)##
## FALSE TRUE
## 6605 57451
summary(hswound.com@meta.data$percent.ribo)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3565 19.2347 27.3616 27.0153 35.3310 79.5486
summary(hswound.com@meta.data$percent.hb)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.0028 0.0000 79.3182
VlnPlot(
hswound.com,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.ribo", "percent.hb"),
ncol = 2, pt.size=0.0001)plot1 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident") + RotatedAxis()
plot2 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "orig.ident") + RotatedAxis()
plot3 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.ribo", group.by = "orig.ident") + RotatedAxis()
plot4 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.hb", group.by = "orig.ident") + RotatedAxis()
plot1 + plot2 + plot3 + plot4 + plot_layout(ncol = 2, guides = 'collect')plot5 <- FeatureScatter(hswound.com, feature1 = "percent.mt", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot6 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot7 <- FeatureScatter(hswound.com, feature1 = "nFeature_RNA", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot8 <- FeatureScatter(hswound.com, feature1 = "nFeature_RNA", feature2 = "percent.mt", group.by = "orig.ident") + RotatedAxis()
plot5 + plot6 + plot7 + plot8 + plot_layout(ncol = 2, guides = 'collect')rm(list=ls());gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3077543 164.4 4899647 261.7 NA 4899647 261.7
## Vcells 9537344 72.8 825932025 6301.4 32768 764259286 5830.9
####----Compute the top relative expression of each gene per cell Use sparse matrix----####
par(mar = c(4, 8, 2, 1))
C <- hswound.com@assays$RNA@counts
C <- Matrix::t(Matrix::t(C)/Matrix::colSums(C)) * 100
most_expressed <- order(apply(C, 1, median), decreasing = T)[20:1]
boxplot(t(as.matrix(C[most_expressed, ])), cex = 0.1, las = 1, xlab = "% total count per cell",
col = (scales::hue_pal())(20)[20:1], horizontal = TRUE)####----Fitler gene expressed in at least 10 cells (>10) -> some uninterested gene hemoglobin genes -> MALAT1 gene -> MT genes----####
length(rownames(hswound.com))
table(hswound.com$orig.ident) #sample number
selected_f <- rownames(hswound.com)[Matrix::rowSums(hswound.com@assays$RNA@counts > 0) > 10]; length(selected_f)
selected_f <- selected_f[-c(grep("^HB[^(PES)]", selected_f))]; length(selected_f)
selected_f <- selected_f[-grep("MALAT1", selected_f)]; length(selected_f)
selected_f <- selected_f[-grep("^MT-", selected_f)]; length(selected_f)
hswound.com #Check first
hswound.com <- subset(hswound.com, features = selected_f)
hswound.com #Check if it works
identical(selected_f, rownames(hswound.com))
hswound.com <- subset(hswound.com, subset = nFeature_RNA > 500 & percent.mt < 20 & nCount_RNA > 1000 & nCount_RNA < 150000)
#after filtering
hswound.com
table(hswound.com$orig.ident)
saveRDS(hswound.com, "../00_Origin_objects/s0_original_seurat_object_noDoublets_afterqc.rds")Cell numbers for each sample after quality control
## An object of class Seurat
## 25778 features across 60450 samples within 1 assay
## Active assay: RNA (25778 features, 0 variable features)
##
## PWH26D0 PWH26D1 PWH26D30 PWH26D7 PWH27D0 PWH27D1 PWH27D30 PWH27D7
## 4306 4787 3470 4603 3319 6635 6072 6764
## PWH28D0 PWH28D1 PWH28D30 PWH28D7
## 4175 4676 6002 5641
VlnPlot(
hswound.com,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.ribo"),
ncol = 2, pt.size=0)plot1 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident") + RotatedAxis()
plot2 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "orig.ident") + RotatedAxis()
plot3 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.ribo", group.by = "orig.ident") + RotatedAxis()
plot4 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.hb", group.by = "orig.ident") + RotatedAxis()
plot1 + plot2 + plot3 + plot4 + plot_layout(ncol = 2, guides = 'collect')plot5 <- FeatureScatter(hswound.com, feature1 = "percent.mt", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot6 <- FeatureScatter(hswound.com, feature1 = "nCount_RNA", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot7 <- FeatureScatter(hswound.com, feature1 = "nFeature_RNA", feature2 = "percent.malat1", group.by = "orig.ident") + RotatedAxis()
plot8 <- FeatureScatter(hswound.com, feature1 = "nFeature_RNA", feature2 = "percent.mt", group.by = "orig.ident") + RotatedAxis()
plot5 + plot6 + plot7 + plot8 + plot_layout(ncol = 2, guides = 'collect')rm(list=ls());gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3075011 164.3 4899647 261.7 NA 4899647 261.7
## Vcells 9300510 71.0 792958744 6049.8 32768 764259286 5830.9
hswound.list <- SplitObject(hswound.com, split.by = "orig.ident")
####----Do normalization for each dataset----####
####----SCTransform normalization----#### These steps were done in Uppmax
####----SCTransform (default) already normalize the nUMI and nGene count----####
####----Cell cycle: removing all signal associated with cell cycle can negatively impact downstream analysis, particularly in differentiating processes (like murine hematopoiesis), where stem cells are quiescent and differentiated cells are proliferating: regressing out the difference between the G2M and S phase scores. This means that signals separating non-cycling cells and cycling cells will be maintained, but differences in cell cycle phase among proliferating cells (which are often uninteresting) will be regressed out of the data----####
hswound.list <- lapply(X = hswound.list, FUN = function(x) {
x <- NormalizeData(x)
x <- CellCycleScoring(object = x, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes, set.ident = FALSE)
x$CC.Difference <- x$S.Score - x$G2M.Score
x <- SCTransform(x, vars.to.regress = c("percent.mt", "CC.Difference"), variable.features.n = 4000, verbose = TRUE, return.only.var.genes = FALSE, method = "glmGamPoi")
x <- RunPCA(x, npcs = 60, ndims.print = 1:5, nfeatures.print = 5, features = VariableFeatures(object = x))
})
rm(hswound.com);gc() #delete the object to save RAM memory
####----Save the individual seurat object----####
for (i in 1:length(hswound.list)) {
saveRDS(hswound.list[[i]], file = paste0("s1_cleanSeurat_",names(hswound.list[i]), ".rds"))
}samples <- list.files("../s0_SeuratObject_SplitPerSample/", pattern = ".rds$")
sm_folders <- paste0("../s0_SeuratObject_SplitPerSample/", samples)
samplenames <- gsub("s1_cleanSeurat_", "", samples)
samplenames <- gsub("\\.rds", "", samplenames)
hswound.list <- list()
for (i in seq_along(samples)) {
tmp_sms <- samplenames[i]
hswound.list[[tmp_sms]] <- readRDS(sm_folders[i])
}
hswound.list## $PWH26D0
## An object of class Seurat
## 47643 features across 4306 samples within 2 assays
## Active assay: SCT (21865 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH26D1
## An object of class Seurat
## 47277 features across 4787 samples within 2 assays
## Active assay: SCT (21499 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH26D30
## An object of class Seurat
## 47849 features across 3470 samples within 2 assays
## Active assay: SCT (22071 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH26D7
## An object of class Seurat
## 49028 features across 4603 samples within 2 assays
## Active assay: SCT (23250 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH27D0
## An object of class Seurat
## 47020 features across 3319 samples within 2 assays
## Active assay: SCT (21242 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH27D1
## An object of class Seurat
## 47353 features across 6635 samples within 2 assays
## Active assay: SCT (21575 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH27D30
## An object of class Seurat
## 48220 features across 6072 samples within 2 assays
## Active assay: SCT (22442 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH27D7
## An object of class Seurat
## 49056 features across 6764 samples within 2 assays
## Active assay: SCT (23278 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH28D0
## An object of class Seurat
## 47676 features across 4175 samples within 2 assays
## Active assay: SCT (21898 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH28D1
## An object of class Seurat
## 47013 features across 4676 samples within 2 assays
## Active assay: SCT (21235 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH28D30
## An object of class Seurat
## 48189 features across 6002 samples within 2 assays
## Active assay: SCT (22411 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## $PWH28D7
## An object of class Seurat
## 48816 features across 5641 samples within 2 assays
## Active assay: SCT (23038 features, 4000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
Something need to think about it. (Variable genes across samples/conditions)
####----Check variable genes for each sample----####
vg2 <- lapply(hswound.list, function(x) VariableFeatures(x)[1:3000])
vg.all <- unique(unlist(vg2)) #unique top 3k variable genes among all samples
rn <- lapply(hswound.list, rownames) #total number of genes expressed in each sample
nE <- colSums(Reduce(rbind, lapply(rn, function(x) vg.all %in% x))) #Check the overlapping number of variable genes across samples
vg.all <- vg.all[nE == length(hswound.list)] #length(hswound.list) the number of samples
setdiff(features.all, vg.all) #features.all is from SelectIntegrationFeatures function directly (below). PS: no big difference if I didn't do any further filtering even though genes not expressed across all samples####----Integration with SCTransform----####
####----First select top variable genes----####
features.all <- SelectIntegrationFeatures(object.list = hswound.list, nfeatures = 4500) #First select 4.5K genes
rn <- lapply(hswound.list, rownames)
nE <- colSums(Reduce(rbind, lapply(rn, function(x) features.all %in% x)))
features.all <- features.all[nE == length(hswound.list)][1:4000] #Keep only 4K variable genes expressed in all samples
eachvg <- list(
PWH26D0=VariableFeatures(hswound.list$PWH26D0),
PWH26D1=VariableFeatures(hswound.list$PWH26D1),
PWH26D30=VariableFeatures(hswound.list$PWH26D30),
PWH26D7=VariableFeatures(hswound.list$PWH26D7),
PWH27D0=VariableFeatures(hswound.list$PWH27D0),
PWH27D1=VariableFeatures(hswound.list$PWH27D1),
PWH27D30=VariableFeatures(hswound.list$PWH27D30),
PWH27D7=VariableFeatures(hswound.list$PWH27D7),
PWH28D0=VariableFeatures(hswound.list$PWH28D0),
PWH28D1=VariableFeatures(hswound.list$PWH28D1),
PWH28D30=VariableFeatures(hswound.list$PWH28D30),
PWH28D7=VariableFeatures(hswound.list$PWH28D7),
Allvg=features.all)
source("../Functions/overlap_phyper2.R")
overlap_phyper2(eachvg, eachvg)## $P
## PWH26D0 PWH26D1 PWH26D30 PWH26D7 PWH27D0
## PWH26D0 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH26D1 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH26D30 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH26D7 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH27D0 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH27D1 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH27D30 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH27D7 5.486432e-268 3.921443e-309 0.000000e+00 0 0.000000e+00
## PWH28D0 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## PWH28D1 0.000000e+00 0.000000e+00 1.996025e-321 0 0.000000e+00
## PWH28D30 8.199847e-304 1.806570e-264 0.000000e+00 0 1.435086e-304
## PWH28D7 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## Allvg 0.000000e+00 0.000000e+00 0.000000e+00 0 0.000000e+00
## NA NA NA NA NA
## PWH27D1 PWH27D30 PWH27D7 PWH28D0 PWH28D1
## PWH26D0 0.000000e+00 0.000000e+00 5.486432e-268 0.000000e+00 0.000000e+00
## PWH26D1 0.000000e+00 0.000000e+00 3.921443e-309 0.000000e+00 0.000000e+00
## PWH26D30 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.996025e-321
## PWH26D7 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## PWH27D0 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## PWH27D1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## PWH27D30 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.984499e-311
## PWH27D7 0.000000e+00 0.000000e+00 0.000000e+00 2.505588e-305 2.657925e-302
## PWH28D0 0.000000e+00 0.000000e+00 2.505588e-305 0.000000e+00 0.000000e+00
## PWH28D1 0.000000e+00 1.984499e-311 2.657925e-302 0.000000e+00 0.000000e+00
## PWH28D30 3.178838e-243 0.000000e+00 0.000000e+00 0.000000e+00 3.079424e-245
## PWH28D7 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## Allvg 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## NA NA NA NA NA
## PWH28D30 PWH28D7 Allvg
## PWH26D0 8.199847e-304 0 0 NA
## PWH26D1 1.806570e-264 0 0 NA
## PWH26D30 0.000000e+00 0 0 NA
## PWH26D7 0.000000e+00 0 0 NA
## PWH27D0 1.435086e-304 0 0 NA
## PWH27D1 3.178838e-243 0 0 NA
## PWH27D30 0.000000e+00 0 0 NA
## PWH27D7 0.000000e+00 0 0 NA
## PWH28D0 0.000000e+00 0 0 NA
## PWH28D1 3.079424e-245 0 0 NA
## PWH28D30 0.000000e+00 0 0 NA
## PWH28D7 0.000000e+00 0 0 NA
## Allvg 0.000000e+00 0 0 NA
## NA NA NA NA
##
## $M
## PWH26D0 PWH26D1 PWH26D30 PWH26D7 PWH27D0 PWH27D1 PWH27D30 PWH27D7
## PWH26D0 4000 2982 2964 2912 3260 2916 2854 2767
## PWH26D1 2982 4000 2918 2933 3024 3343 2864 2823
## PWH26D30 2964 2918 4000 3097 2937 2861 3172 2995
## PWH26D7 2912 2933 3097 4000 2913 2914 3016 3209
## PWH27D0 3260 3024 2937 2913 4000 3055 2939 2862
## PWH27D1 2916 3343 2861 2914 3055 4000 2874 2886
## PWH27D30 2854 2864 3172 3016 2939 2874 4000 3040
## PWH27D7 2767 2823 2995 3209 2862 2886 3040 4000
## PWH28D0 3214 2923 2966 2889 3197 2918 2894 2818
## PWH28D1 2900 3344 2839 2869 2981 3347 2826 2814
## PWH28D30 2816 2762 3195 2979 2817 2731 3190 2983
## PWH28D7 2904 2952 3067 3198 2919 2998 3016 3178
## Allvg 3229 3288 3209 3191 3318 3279 3193 3117
## nTotal2 4000 4000 4000 4000 4000 4000 4000 4000
## PWH28D0 PWH28D1 PWH28D30 PWH28D7 Allvg nTotal1
## PWH26D0 3214 2900 2816 2904 3229 4000
## PWH26D1 2923 3344 2762 2952 3288 4000
## PWH26D30 2966 2839 3195 3067 3209 4000
## PWH26D7 2889 2869 2979 3198 3191 4000
## PWH27D0 3197 2981 2817 2919 3318 4000
## PWH27D1 2918 3347 2731 2998 3279 4000
## PWH27D30 2894 2826 3190 3016 3193 4000
## PWH27D7 2818 2814 2983 3178 3117 4000
## PWH28D0 4000 2913 2880 2959 3217 4000
## PWH28D1 2913 4000 2734 2944 3251 4000
## PWH28D30 2880 2734 4000 3032 3103 4000
## PWH28D7 2959 2944 3032 4000 3202 4000
## Allvg 3217 3251 3103 3202 4000 4000
## nTotal2 4000 4000 4000 4000 4000 8025
##
## $plot
####----Prepare the integration----####
hswound.list <- PrepSCTIntegration(object.list = hswound.list, anchor.features = features.all)
hswound.combined.sct <- merge(hswound.list[[1]],
y = hswound.list[2:length(hswound.list)],
project = "humanwound",
merge.data = TRUE)
hswound.combined.sct## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3005966 160.6 4899647 261.7 NA 4899647 261.7
## Vcells 5952144 45.5 3145689968 23999.8 32768 2889092333 22042.1
VariableFeatures(hswound.combined.sct) <- features.all
hswound.combined.sct <- RunPCA(object = hswound.combined.sct, assay = "SCT", npcs = 60, features = VariableFeatures(hswound.combined.sct))table(hswound.combined.sct$orig.ident)##
## PWH26D0 PWH26D1 PWH26D7 PWH26D30 PWH27D0 PWH27D1 PWH27D7 PWH27D30
## 4306 4787 4603 3470 3319 6635 6764 6072
## PWH28D0 PWH28D1 PWH28D7 PWH28D30
## 4175 4676 5641 6002
ElbowPlot(hswound.combined.sct, ndims = 60, reduction = "pca")####----Check the genes contributing to top PCs----####
#If there is any gene related to the ribosomal genes (^RP[S|L]),
VizDimLoadings(hswound.combined.sct, dims = 1:12, reduction = "pca")hswound.combined.sct <- RunHarmony(object = hswound.combined.sct,
assay.use = "SCT",
reduction = "pca",
dims.use = 1:40,
group.by.vars = "orig.ident",
plot_convergence = TRUE)
#rn pca, umap, clustering
hswound.combined.sct <- RunUMAP(object = hswound.combined.sct, assay = "SCT", reduction = "harmony", dims = 1:40)
hswound.combined.sct <- FindNeighbors(object = hswound.combined.sct, assay = "SCT", reduction = "harmony", dims = 1:40, k.param = 40)
hswound.combined.sct <- FindClusters(object = hswound.combined.sct, resolution = 0.5)
hswound.combined.sct <- FindClusters(object = hswound.combined.sct, resolution = 0.8)
hswound.combined.sct$orig.ident <- factor(x = hswound.combined.sct$orig.ident, levels =
c("PWH26D0", "PWH26D1", "PWH26D7","PWH26D30",
"PWH27D0", "PWH27D1", "PWH27D7","PWH27D30",
"PWH28D0", "PWH28D1", "PWH28D7","PWH28D30"))
hswound.combined.sct$Condition <- factor(x = hswound.combined.sct$Condition, levels =
c("Skin", "Wound1", "Wound7","Wound30"))
saveRDS(hswound.combined.sct, file = "s1_cleanSeurat_harmony_allSamples_clusters.rds")DimPlot(hswound.combined.sct, reduction = "umap", group.by='seurat_clusters', label=TRUE) +
ggtitle('Cluster')DimPlot(hswound.combined.sct, reduction = "umap", group.by='orig.ident', label=FALSE) +
ggtitle('Samples')DimPlot(hswound.combined.sct, reduction = "umap", group.by = "seurat_clusters", pt.size = .001, split.by = 'orig.ident', ncol = 4, label = TRUE) + NoLegend()DimPlot(hswound.combined.sct, reduction = "umap", group.by='Condition', label=FALSE) +
ggtitle('Groups')DimPlot(hswound.combined.sct, reduction = "umap", group.by = "seurat_clusters", pt.size = .001, split.by = 'Condition', ncol = 4, label = TRUE) + NoLegend()DimPlot(hswound.combined.sct, reduction = "umap", group.by='Age', label=FALSE) +
ggtitle('Age')DimPlot(hswound.combined.sct, reduction = "umap", group.by='Gender', label=FALSE) +
ggtitle('Gender')# Explore whether clusters segregate by cell cycle phase
DimPlot(hswound.combined.sct,
label = TRUE,
split.by = "Phase") + NoLegend()(DimPlot(hswound.combined.sct, reduction = "umap", group.by='seurat_clusters', label=TRUE) + ggtitle('Clusters') + NoAxes() + NoLegend()) +
(FeaturePlot(hswound.combined.sct, features = c("MKI67"), reduction = "umap", cols = c("gray","red")) + NoLegend() + NoAxes())# Determine metrics to plot present in hswound.combined.sct@meta.data
metrics <- c("nCount_RNA", "nFeature_RNA", "S.Score", "G2M.Score", "percent.mt")
FeaturePlot(hswound.combined.sct,
reduction = "umap",
features = metrics,
pt.size = 0.001,
label = FALSE)####----Please pay attentions to the original numbers of cells per sample----####
clusters <- unique(hswound.combined.sct@meta.data[["seurat_clusters"]])
clusters <- clusters[order(clusters)]
df <- data.frame()
for(i in 1:length(clusters)){
SmCell_sum <- table(hswound.combined.sct$orig.ident)
tmp.df1 <- hswound.combined.sct@meta.data %>% subset(seurat_clusters == clusters[i]) %>% select(orig.ident) %>% table()
if(length(tmp.df1) == 12){
#First normalize the total cell counts per sample
cur_df <- as.data.frame(tmp.df1 / SmCell_sum)
colnames(cur_df) <- c("Sample", "Freq")
#Calculate the normalized proportion
cur_df$Freq <- cur_df$Freq * 1/(sum(cur_df$Freq))
cur_df$Cluster <- clusters[i]
df <- rbind(df, cur_df)
} else {
#print(i);print(tmp.df1)
#only include the matched samples
match.sample <- SmCell_sum[names(SmCell_sum) %in% names(tmp.df1)]
#print(match.sample)
#First normalize the total cell counts per sample
cur_df <- as.data.frame(tmp.df1 / match.sample)
colnames(cur_df) <- c("Sample", "Freq")
#Calculate the normalized proportion
cur_df$Freq <- cur_df$Freq * 1/(sum(cur_df$Freq))
cur_df$Cluster <- clusters[i]
df <- rbind(df, cur_df)
}
}
##Plot for each sample
ggplot(df, aes(x = Cluster, y = Freq, fill = Sample)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_brewer(palette = "Paired") + # Paired or Set3 maximum variable = 12
xlab('Clsuter') +
scale_y_continuous(breaks = seq(0, 1, 0.1),
expand = c(0, 0),
name = 'Percentage') +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
#panel.border = element_rect(size = 0.35),
axis.ticks = element_blank()#,
#axis.text.x = element_text(size = 5)
) +
coord_flip()##Plot for each group
df.group <- df %>% mutate(Sample = gsub("PWH..", "", Sample)) %>% group_by(Cluster, Sample) %>%
summarise(Freq = sum(Freq))## `summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.
df.group$Sample <- factor(df.group$Sample, levels = c("D0", "D1", "D7", "D30"))
ggplot(df.group, aes(x = Cluster, y = Freq, fill = Sample)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_brewer(palette = "Set1") + # Paired or Set3 maximum variable = 12
xlab('Clsuter') +
scale_y_continuous(breaks = seq(0, 1, 0.1),
expand = c(0, 0),
name = 'Percentage') +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
#panel.border = element_rect(size = 0.35),
axis.ticks = element_blank()#,
#axis.text.x = element_text(size = 5)
) +
coord_flip()# Clustering with louvain (algorithm 1) using different resolutions
for (res in c(0.2, 0.4, 0.5, 0.6, 0.8, 1.0, 1.2, 1.5, 2)) {
hswound.combined.sct <- FindClusters(hswound.combined.sct, resolution = res, verbose = FALSE)
}
plot_grid(ncol = 2,
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.0.2") +
ggtitle("louvain_0.2"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.0.4") +
ggtitle("louvain_0.4"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.0.5") +
ggtitle("louvain_0.5"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.0.6") +
ggtitle("louvain_0.6"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.0.8") +
ggtitle("louvain_0.8"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.1") +
ggtitle("louvain_1"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.1.2") +
ggtitle("louvain_1.2"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.1.5") +
ggtitle("louvain_1.5"),
DimPlot(hswound.combined.sct, reduction = "umap", group.by = "SCT_snn_res.2") +
ggtitle("louvain_2"))# install.packages('clustree')
suppressPackageStartupMessages(library(clustree))
clustree(hswound.combined.sct@meta.data, prefix = "SCT_snn_res.")## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3215233 171.8 9023238 481.9 NA 9023238 481.9
## Vcells 8409359 64.2 2516551975 19199.8 32768 3145681718 23999.7
####----Change to RNA assay to do the DE analysis----####
DefaultAssay(hswound.combined.sct) <- "RNA" #DE for all genes instead of only using variable gene
hswound.combined.sct
hswound.combined.sct <- NormalizeData(hswound.combined.sct, verbose = TRUE, normalization.method = "LogNormalize", scale.factor = 10000)
####----Calculate the scale data for following heatmap visualization of marker genes----####
hswound.combined.sct <- ScaleData(object = hswound.combined.sct, features = rownames(hswound.combined.sct), vars.to.regress = c("percent.mt", "CC.Difference", "nCount_RNA", "nFeature_RNA"))
#save data
saveRDS(hswound.combined.sct, "s2_cleanSeurat_harmony_allSamples_clusters_normGenes.rds") #save normalized gene expressions
Idents(hswound.combined.sct) <- "SCT_snn_res.0.8"
table(hswound.combined.sct$SCT_snn_res.0.8)
####----FindAllMarkers for all clusters----####
markers <- FindAllMarkers(
hswound.combined.sct,
only.pos = TRUE,
min.pct = 0.25, #min.pct = 0.5, logfc.threshold = 0.5,
logfc.threshold = 0.25,
test.use = "MAST") #MAST has good FDR control and is faster than DESeq2, default test is wilcox
####----add the gene description to the cluster marker genes----####
require(biomaRt) #listMarts() : Check the version and dataset
hs.ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") #listAttributes(mart = hs.ensembl)
annot <- getBM(attributes = c("external_gene_name", "entrezgene_id", "gene_biotype","description"), mart=hs.ensembl) %>%
distinct(external_gene_name, .keep_all = TRUE) %>% tidyr::separate(description, c("Gene_anno", "Other"), sep="\\[Source") %>%
dplyr::select(-5)
annot <- readRDS("../Functions/humanAnnotation.rds")
####----add the info and reorder the columns----####
markers <- markers %>% left_join(., annot, by=c("gene" = "external_gene_name")) %>%
dplyr::select(6, 7, 2:4, 1, 5, everything()) %>% arrange(cluster, desc(avg_log2FC))
data.table::fwrite(markers, file="s2_cleanSeurat_harmony_allSamples_markerGene_ori.txt", sep="\t")hswound.combined.sct <- readRDS(file = "s2_cleanSeurat_harmony_allSamples_clusters_normGenes.rds") #reload the clustering results with normalized expressions for further visualization
hswound.combined.sct## An object of class Seurat
## 51179 features across 60450 samples within 2 assays
## Active assay: RNA (25778 features, 0 variable features)
## 1 other assay present: SCT
## 3 dimensional reductions calculated: pca, harmony, umap
variable.genes <- VariableFeatures(hswound.combined.sct, assay="SCT") %>% as.data.frame() %>% rename("gene" = ".") %>%
mutate(VariableGene = "Yes")
markers <- data.table::fread("s2_cleanSeurat_harmony_allSamples_markerGene_ori.txt") #marker genes
####----Add the information about if the gene is the variable gene----####
markers <- markers %>% left_join(., variable.genes, by=c("gene" = "gene"))top5 <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
####----Heatmap plot----####
DoHeatmap(hswound.combined.sct, features = as.character(unique(top5$gene)), group.by='SCT_snn_res.0.8', label=TRUE, size = 2, angle = 30, disp.min = -2.5)####----Violin plot----####
top1 <- markers %>% group_by(cluster) %>% top_n(n = 1, wt = avg_log2FC)
VlnPlot(hswound.combined.sct, features = top1$gene, group.by='SCT_snn_res.0.8', ncol = 2, pt.size = 0) # set up list of canonical cell type markers
canonical_markers <- list(
'Keratinocytes' = c('KRT5', 'KRT14', 'KRT1', 'KRT10'),
'Fibroblasts' = c('COL1A1', 'COL1A2', 'DCN'),
'Melanocyte' = c('PMEL', 'MLANA'),
'Endothelial-cells' = c('PECAM1', 'VWF'),
'T-cells' = c('CD52', 'CD3D'),
'Dendritic-cells' = c('CD74')
)
plot_list <- FeaturePlot(
hswound.combined.sct,
features=unlist(canonical_markers),
combine=FALSE, cols = c("gray","red")
)
# apply theme to each feature plot
for(i in 1:length(plot_list)){
plot_list[[i]] <- plot_list[[i]] + NoLegend() + NoAxes()
}
cowplot::plot_grid(plotlist = plot_list, ncol = 4)rm(plot_list);gc(verbose = FALSE)## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3351975 179.1 9023238 481.9 NA 9023238 481.9
## Vcells 3216676581 24541.3 3820091770 29145.0 32768 3820091770 29145.0
library(SingleR)## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
library(celldex)##
## Attaching package: 'celldex'
## The following objects are masked from 'package:SingleR':
##
## BlueprintEncodeData, DatabaseImmuneCellExpressionData,
## HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
## MouseRNAseqData, NovershternHematopoieticData
hpca.se <- HumanPrimaryCellAtlasData()## snapshotDate(): 2021-05-18
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
hpca.se$label.main <- str_replace_all(hpca.se$label.main,
c("NK_cell" = "NK cells", "B_cell" = "B cells",
"DC" = "Dendritic cells","HSC_-G-CSF" = "HSC G-CSF",
"Monocyte" = "Monocytes", "Astrocyte" = "Astrocytes",
"Erythroblast" = "Erythroblasts", "Macrophage" = "Macrophages",
"BM" = "BMs", "MSC"="MSCs",
"CMP" = "CMPs", "GMP" = "GMPs",
"MEP"="MEPs", "Myelocyte"="Myelocytes",
"Neuroepithelial_cell"="Neuroepithelial cells", "T_cells" = "T cells",
"iPS_cells" = "iPS cells", "Endothelial_cells" = "Endothelial cells",
"Tissue_stem_cells" = "Tissue stem cells", "Embryonic_stem_cells" = "Embryonic stem cells",
"Smooth_muscle_cells" = "Smooth muscle cells", "Epithelial_cells" = "Epithelial cells"))
hpca.se$label.main[hpca.se$label.main == "HSC_CD34+"] <-"HSC CD34"
hpca.se$label.main[hpca.se$label.main == "Pro-B cells_CD34+"] <-"Pro-B cells CD34 Pos"
hpca.se$label.main[hpca.se$label.main == "Pre-B cells_CD34-"] <-"Pre-B cells CD34 Neg"
####----Check the unique cell types----####
hpca.se$label.main %>% unique()## [1] "Dendritic cells" "Smooth muscle cells" "Epithelial cells"
## [4] "B cells" "Neutrophils" "T cells"
## [7] "Monocytes" "Erythroblasts" "BMs & Prog."
## [10] "Endothelial cells" "Gametocytes" "Neurons"
## [13] "Keratinocytes" "HSC G-CSF" "Macrophages"
## [16] "NK cells" "Embryonic stem cells" "Tissue stem cells"
## [19] "Chondrocytes" "Osteoblasts" "BMs"
## [22] "Platelets" "Fibroblasts" "iPS cells"
## [25] "Hepatocytes" "MSCs" "Neuroepithelial cells"
## [28] "Astrocytes" "HSC CD34" "CMPs"
## [31] "GMPs" "MEPs" "Myelocytes"
## [34] "Pre-B cells CD34 Neg" "Pro-B cells CD34 Pos" "Pro-Myelocytes"
#Get the count data from Seurat Object
hswound.singleR <- GetAssayData(hswound.combined.sct)
##First check the main cell type
Idents(hswound.combined.sct) <- "SCT_snn_res.0.8" #Choose different resolutions to annotate
pred.hesc <- SingleR(test = hswound.singleR, ref = hpca.se, assay.type.test=1, labels = hpca.se$label.main, clusters = Idents(hswound.combined.sct))
##Plot of main cell type
plotScoreHeatmap(pred.hesc)celltype.main <- as.data.frame(pred.hesc) %>% tibble::rownames_to_column(var = "Cluster")
celltype.main.score <- as.data.frame(pred.hesc) %>% t() %>% as.data.frame() %>% tibble::rownames_to_column(var = "Cluster") %>% mutate(SingleR = "MainCellType")
celltype.main$Cluster <- parse_integer(celltype.main$Cluster)
markers <- markers %>% left_join(., celltype.main[, c(1,38)], by=c("cluster" = "Cluster")) #Choose the first.label
####----Check the unique fine cell types----####
hpca.se$label.fine %>% unique() #sub cell types [which represents the highest resolution]## [1] "DC:monocyte-derived:immature"
## [2] "DC:monocyte-derived:Galectin-1"
## [3] "DC:monocyte-derived:LPS"
## [4] "DC:monocyte-derived"
## [5] "Smooth_muscle_cells:bronchial:vit_D"
## [6] "Smooth_muscle_cells:bronchial"
## [7] "Epithelial_cells:bronchial"
## [8] "B_cell"
## [9] "Neutrophil"
## [10] "T_cell:CD8+_Central_memory"
## [11] "T_cell:CD8+"
## [12] "T_cell:CD4+"
## [13] "T_cell:CD8+_effector_memory_RA"
## [14] "T_cell:CD8+_effector_memory"
## [15] "T_cell:CD8+_naive"
## [16] "Monocyte"
## [17] "Erythroblast"
## [18] "BM"
## [19] "DC:monocyte-derived:rosiglitazone"
## [20] "DC:monocyte-derived:AM580"
## [21] "DC:monocyte-derived:rosiglitazone/AGN193109"
## [22] "DC:monocyte-derived:anti-DC-SIGN_2h"
## [23] "Endothelial_cells:HUVEC"
## [24] "Endothelial_cells:HUVEC:Borrelia_burgdorferi"
## [25] "Endothelial_cells:HUVEC:IFNg"
## [26] "Endothelial_cells:lymphatic"
## [27] "Endothelial_cells:HUVEC:Serum_Amyloid_A"
## [28] "Endothelial_cells:lymphatic:TNFa_48h"
## [29] "T_cell:effector"
## [30] "T_cell:CCR10+CLA+1,25(OH)2_vit_D3/IL-12"
## [31] "T_cell:CCR10-CLA+1,25(OH)2_vit_D3/IL-12"
## [32] "Gametocytes:spermatocyte"
## [33] "DC:monocyte-derived:A._fumigatus_germ_tubes_6h"
## [34] "Neurons:ES_cell-derived_neural_precursor"
## [35] "Keratinocytes"
## [36] "Keratinocytes:IL19"
## [37] "Keratinocytes:IL20"
## [38] "Keratinocytes:IL22"
## [39] "Keratinocytes:IL24"
## [40] "Keratinocytes:IL26"
## [41] "Keratinocytes:KGF"
## [42] "Keratinocytes:IFNg"
## [43] "Keratinocytes:IL1b"
## [44] "HSC_-G-CSF"
## [45] "DC:monocyte-derived:mature"
## [46] "Monocyte:anti-FcgRIIB"
## [47] "Macrophage:monocyte-derived:IL-4/cntrl"
## [48] "Macrophage:monocyte-derived:IL-4/Dex/cntrl"
## [49] "Macrophage:monocyte-derived:IL-4/Dex/TGFb"
## [50] "Macrophage:monocyte-derived:IL-4/TGFb"
## [51] "Monocyte:leukotriene_D4"
## [52] "NK_cell"
## [53] "NK_cell:IL2"
## [54] "Embryonic_stem_cells"
## [55] "Tissue_stem_cells:iliac_MSC"
## [56] "Chondrocytes:MSC-derived"
## [57] "Osteoblasts"
## [58] "Tissue_stem_cells:BM_MSC"
## [59] "Osteoblasts:BMP2"
## [60] "Tissue_stem_cells:BM_MSC:BMP2"
## [61] "Tissue_stem_cells:BM_MSC:TGFb3"
## [62] "DC:monocyte-derived:Poly(IC)"
## [63] "DC:monocyte-derived:CD40L"
## [64] "DC:monocyte-derived:Schuler_treatment"
## [65] "DC:monocyte-derived:antiCD40/VAF347"
## [66] "Tissue_stem_cells:dental_pulp"
## [67] "T_cell:CD4+_central_memory"
## [68] "T_cell:CD4+_effector_memory"
## [69] "T_cell:CD4+_Naive"
## [70] "Smooth_muscle_cells:vascular"
## [71] "Smooth_muscle_cells:vascular:IL-17"
## [72] "Platelets"
## [73] "Epithelial_cells:bladder"
## [74] "Macrophage:monocyte-derived"
## [75] "Macrophage:monocyte-derived:M-CSF"
## [76] "Macrophage:monocyte-derived:M-CSF/IFNg"
## [77] "Macrophage:monocyte-derived:M-CSF/Pam3Cys"
## [78] "Macrophage:monocyte-derived:M-CSF/IFNg/Pam3Cys"
## [79] "Macrophage:monocyte-derived:IFNa"
## [80] "Gametocytes:oocyte"
## [81] "Monocyte:F._tularensis_novicida"
## [82] "Endothelial_cells:HUVEC:B._anthracis_LT"
## [83] "B_cell:Germinal_center"
## [84] "B_cell:Plasma_cell"
## [85] "B_cell:Naive"
## [86] "B_cell:Memory"
## [87] "DC:monocyte-derived:AEC-conditioned"
## [88] "Tissue_stem_cells:lipoma-derived_MSC"
## [89] "Tissue_stem_cells:adipose-derived_MSC_AM3"
## [90] "Endothelial_cells:HUVEC:FPV-infected"
## [91] "Endothelial_cells:HUVEC:PR8-infected"
## [92] "Endothelial_cells:HUVEC:H5N1-infected"
## [93] "Macrophage:monocyte-derived:S._aureus"
## [94] "Fibroblasts:foreskin"
## [95] "iPS_cells:skin_fibroblast-derived"
## [96] "iPS_cells:skin_fibroblast"
## [97] "T_cell:gamma-delta"
## [98] "Monocyte:CD14+"
## [99] "Macrophage:Alveolar"
## [100] "Macrophage:Alveolar:B._anthacis_spores"
## [101] "Neutrophil:inflam"
## [102] "iPS_cells:PDB_fibroblasts"
## [103] "iPS_cells:PDB_1lox-17Puro-5"
## [104] "iPS_cells:PDB_1lox-17Puro-10"
## [105] "iPS_cells:PDB_1lox-21Puro-20"
## [106] "iPS_cells:PDB_1lox-21Puro-26"
## [107] "iPS_cells:PDB_2lox-5"
## [108] "iPS_cells:PDB_2lox-22"
## [109] "iPS_cells:PDB_2lox-21"
## [110] "iPS_cells:PDB_2lox-17"
## [111] "iPS_cells:CRL2097_foreskin"
## [112] "iPS_cells:CRL2097_foreskin-derived:d20_hepatic_diff"
## [113] "iPS_cells:CRL2097_foreskin-derived:undiff."
## [114] "B_cell:CXCR4+_centroblast"
## [115] "B_cell:CXCR4-_centrocyte"
## [116] "Endothelial_cells:HUVEC:VEGF"
## [117] "iPS_cells:fibroblasts"
## [118] "iPS_cells:fibroblast-derived:Direct_del._reprog"
## [119] "iPS_cells:fibroblast-derived:Retroviral_transf"
## [120] "Endothelial_cells:lymphatic:KSHV"
## [121] "Endothelial_cells:blood_vessel"
## [122] "Monocyte:CD16-"
## [123] "Monocyte:CD16+"
## [124] "Tissue_stem_cells:BM_MSC:osteogenic"
## [125] "Hepatocytes"
## [126] "Neutrophil:uropathogenic_E._coli_UTI89"
## [127] "Neutrophil:commensal_E._coli_MG1655"
## [128] "MSC"
## [129] "Neuroepithelial_cell:ESC-derived"
## [130] "Astrocyte:Embryonic_stem_cell-derived"
## [131] "Endothelial_cells:HUVEC:IL-1b"
## [132] "HSC_CD34+"
## [133] "CMP"
## [134] "GMP"
## [135] "B_cell:immature"
## [136] "MEP"
## [137] "Myelocyte"
## [138] "Pre-B_cell_CD34-"
## [139] "Pro-B_cell_CD34+"
## [140] "Pro-Myelocyte"
## [141] "Smooth_muscle_cells:umbilical_vein"
## [142] "iPS_cells:foreskin_fibrobasts"
## [143] "iPS_cells:iPS:minicircle-derived"
## [144] "iPS_cells:adipose_stem_cells"
## [145] "iPS_cells:adipose_stem_cell-derived:lentiviral"
## [146] "iPS_cells:adipose_stem_cell-derived:minicircle-derived"
## [147] "Fibroblasts:breast"
## [148] "Monocyte:MCSF"
## [149] "Monocyte:CXCL4"
## [150] "Neurons:adrenal_medulla_cell_line"
## [151] "Tissue_stem_cells:CD326-CD56+"
## [152] "NK_cell:CD56hiCD62L+"
## [153] "T_cell:Treg:Naive"
## [154] "Neutrophil:LPS"
## [155] "Neutrophil:GM-CSF_IFNg"
## [156] "Monocyte:S._typhimurium_flagellin"
## [157] "Neurons:Schwann_cell"
##Second check the fine cell type
pred.hesc.fine <- SingleR(test = hswound.singleR, ref = hpca.se, assay.type.test=1, labels = hpca.se$label.fine, clusters = Idents(hswound.combined.sct))
##Plot of fine cell type
plotScoreHeatmap(pred.hesc.fine)celltype.fine <- as.data.frame(pred.hesc.fine) %>% tibble::rownames_to_column(var = "Cluster")
celltype.fine.score <- as.data.frame(pred.hesc.fine) %>% t() %>% as.data.frame() %>% tibble::rownames_to_column(var = "Cluster") %>% mutate(SingleR = "FineCellType")
celltype.fine$Cluster <- parse_integer(celltype.fine$Cluster)
markers <- markers %>% left_join(., celltype.fine[, c(1,159)], by=c("cluster" = "Cluster")) #Choose the first.label
colnames(markers)[12:13] <- c("SingleR_main", "SingleR_fine")
data.table::fwrite(markers, "s2_cleanSeurat_harmony_allSamples_markerGene.txt", sep = "\t")
celltype.scores <- rbind(celltype.main.score, celltype.fine.score) %>% dplyr::select(29, everything())
data.table::fwrite(celltype.scores, file = "s2_cleanSeurat_harmony_allSamples_markerGene_singleR_scores.txt", sep = "\t")sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] celldex_1.2.0 SingleR_1.7.1
## [3] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [5] GenomicRanges_1.44.0 GenomeInfoDb_1.28.4
## [7] IRanges_2.26.0 S4Vectors_0.30.0
## [9] BiocGenerics_0.38.0 MatrixGenerics_1.4.3
## [11] matrixStats_0.61.0 clustree_0.4.3
## [13] ggraph_2.0.5 pheatmap_1.0.12
## [15] harmony_0.1.0 Rcpp_1.0.7
## [17] cowplot_1.1.1 sctransform_0.3.2
## [19] patchwork_1.1.1 magrittr_2.0.1
## [21] forcats_0.5.1 stringr_1.4.0
## [23] dplyr_1.0.7 purrr_0.3.4
## [25] readr_2.0.1 tidyr_1.1.3
## [27] tibble_3.1.4 ggplot2_3.3.5
## [29] tidyverse_1.3.1 SeuratObject_4.0.2
## [31] Seurat_4.0.4
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.22
## [3] tidyselect_1.1.1 RSQLite_2.2.8
## [5] AnnotationDbi_1.54.1 htmlwidgets_1.5.4
## [7] grid_4.1.1 BiocParallel_1.26.2
## [9] Rtsne_0.15 munsell_0.5.0
## [11] ScaledMatrix_1.0.0 codetools_0.2-18
## [13] ica_1.0-2 future_1.22.1
## [15] miniUI_0.1.1.1 withr_2.4.2
## [17] colorspace_2.0-2 filelock_1.0.2
## [19] highr_0.9 knitr_1.34
## [21] rstudioapi_0.13 ROCR_1.0-11
## [23] tensor_1.5 listenv_0.8.0
## [25] labeling_0.4.2 GenomeInfoDbData_1.2.6
## [27] polyclip_1.10-0 bit64_4.0.5
## [29] farver_2.1.0 parallelly_1.28.1
## [31] vctrs_0.3.8 generics_0.1.0
## [33] xfun_0.26 BiocFileCache_2.0.0
## [35] R6_2.5.1 graphlayouts_0.7.1
## [37] rsvd_1.0.5 cachem_1.0.6
## [39] bitops_1.0-7 spatstat.utils_2.2-0
## [41] DelayedArray_0.18.0 assertthat_0.2.1
## [43] promises_1.2.0.1 scales_1.1.1
## [45] gtable_0.3.0 beachmat_2.8.1
## [47] globals_0.14.0 goftest_1.2-2
## [49] tidygraph_1.2.0 rlang_0.4.11
## [51] splines_4.1.1 lazyeval_0.2.2
## [53] spatstat.geom_2.2-2 broom_0.7.9
## [55] checkmate_2.0.0 BiocManager_1.30.16
## [57] yaml_2.2.1 reshape2_1.4.4
## [59] abind_1.4-5 modelr_0.1.8
## [61] backports_1.2.1 httpuv_1.6.3
## [63] tools_4.1.1 ellipsis_0.3.2
## [65] spatstat.core_2.3-0 jquerylib_0.1.4
## [67] RColorBrewer_1.1-2 ggridges_0.5.3
## [69] plyr_1.8.6 sparseMatrixStats_1.4.2
## [71] zlibbioc_1.38.0 RCurl_1.98-1.5
## [73] rpart_4.1-15 deldir_0.2-10
## [75] pbapply_1.5-0 viridis_0.6.1
## [77] zoo_1.8-9 haven_2.4.3
## [79] ggrepel_0.9.1 cluster_2.1.2
## [81] fs_1.5.0 data.table_1.14.0
## [83] scattermore_0.7 lmtest_0.9-38
## [85] reprex_2.0.1 RANN_2.6.1
## [87] fitdistrplus_1.1-5 hms_1.1.0
## [89] mime_0.11 evaluate_0.14
## [91] xtable_1.8-4 readxl_1.3.1
## [93] gridExtra_2.3 compiler_4.1.1
## [95] KernSmooth_2.23-20 crayon_1.4.1
## [97] htmltools_0.5.2 mgcv_1.8-36
## [99] later_1.3.0 tzdb_0.1.2
## [101] lubridate_1.7.10 DBI_1.1.1
## [103] ExperimentHub_2.0.0 tweenr_1.0.2
## [105] dbplyr_2.1.1 rappdirs_0.3.3
## [107] MASS_7.3-54 Matrix_1.3-4
## [109] cli_3.0.1 igraph_1.2.6
## [111] pkgconfig_2.0.3 plotly_4.9.4.1
## [113] spatstat.sparse_2.0-0 xml2_1.3.2
## [115] bslib_0.3.0 XVector_0.32.0
## [117] rvest_1.0.1 digest_0.6.27
## [119] RcppAnnoy_0.0.19 Biostrings_2.60.2
## [121] spatstat.data_2.1-0 rmarkdown_2.11
## [123] cellranger_1.1.0 leiden_0.3.9
## [125] uwot_0.1.10 DelayedMatrixStats_1.14.3
## [127] curl_4.3.2 shiny_1.6.0
## [129] lifecycle_1.0.0 nlme_3.1-153
## [131] jsonlite_1.7.2 BiocNeighbors_1.10.0
## [133] viridisLite_0.4.0 fansi_0.5.0
## [135] pillar_1.6.2 lattice_0.20-44
## [137] KEGGREST_1.32.0 fastmap_1.1.0
## [139] httr_1.4.2 survival_3.2-13
## [141] interactiveDisplayBase_1.30.0 glue_1.4.2
## [143] png_0.1-7 BiocVersion_3.13.1
## [145] bit_4.0.4 ggforce_0.3.3
## [147] stringi_1.7.4 sass_0.4.0
## [149] blob_1.2.2 AnnotationHub_3.0.1
## [151] BiocSingular_1.8.1 memoise_2.0.0
## [153] irlba_2.3.3 future.apply_1.8.1